# ---------------------------------------------------------
# TUM - Technichal University of Munich
#
# Original Authors: Taylor Jones
#                   Magdalena Altmann
# Further Authors:  Adrian Wenzel
# Date: 2020-05-06
# Purpose: This source code intends to re-estimate CH4
#          emissions in Munich using a top-down approach.
#          This file provides functions that are used by
#          the main source file.
# Comment: This source code has been developed in 2019,
#          however the date above indicates the first
#          submit to version control.
# ---------------------------------------------------------


#Get footprint information:
get_footprint_info <- function(footprint_file){
  require(ncdf4)
  nc <- nc_open(footprint_file)
  recep_times <- as.POSIXct( ncvar_get(nc,"recep_time"), origin="1970-01-01 00:00:00",tz="GMT")
  bkgd_times  <- as.POSIXct( ncvar_get(nc,"back_time"),  origin="1970-01-01 00:00:00",tz="GMT")
  delta_r     <- recep_times[[2]] - recep_times[[1]]
  delta_b     <- bkgd_times[[2]]  - bkgd_times[[1]]
  date        <- strftime( recep_times[1], format = "%Y%m%d", tz = "GMT")
  output      <- list( "recep_times" = recep_times, "bkgd_times" = bkgd_times, "delta_r" = delta_r, "delta_b" = delta_b , "date" = date )
  return(output)
}

#load em27 observations:
load_observations <- function(obs_file,ems){
  require(zoo)
  obs_df <- read.csv( obs_file )
  tstamp <- as.POSIXct( obs_df$year_day_hour, tz="GMT")
  obs_df <- obs_df[-1]
  obs_df <- zoo(obs_df,order.by = tstamp )
  obs_df2 <- data.frame()
  for( em in ems){
    #x    <- index( obs_df )
    #y    <- obs_df[,paste0(em,"_xch4_sc")]
    #x_sm <- spline(x,y)$x
    #y_sm <- spline(x,y)$y
    
    x_sm <- index( obs_df )
    y_sm <- obs_df[,paste0(em,"_xch4_sc")]
    
    #apply scaling factor to each instrument if needed
    if (apply_scaling == TRUE){
      if (em == "mb"){
        y_sm <- y_sm*1
      } else if (em == "ma"){
        y_sm <- y_sm*1.000016718597312
      } else if (em == "ki"){
        y_sm <- y_sm*0.999716319813758
      } else if (em == "kj"){
        y_sm <- y_sm*0.999224284699808
      } else if (em == "wa"){
        y_sm <- y_sm*0.999968660826879
      }
    }

    
    #obs_df2 <- rbind( obs_df2, data.frame(date=date, recep=index(obs_df), em=em,species = "XCO2",value=obs_df[,paste0(em,"_xco2_sc")]))
    #obs_df2 <- rbind( obs_df2, data.frame(date=date, recep=x, em=em,species = "XCH4", value=y, y_sm = y_sm)) 
    obs_df2 <- rbind( obs_df2, data.frame(date=date, recep=x_sm, em=em,species = "XCH4", value=y_sm )) #, y_sm = y_sm)) 
  }
  return(obs_df2)
}


#load backgrounds:
load_bims <- function(footprint_file,ems){
  fp_info <- get_footprint_info(footprint_file)
  bim_df <- data.frame()                                             # all of the background influence matricies for all ems. 
  for(em in ems){                                                    # loop through the em designators
    bim <- raster(foot_file,varname=paste(em,"BIM"))                 # load the background influence matrix
    bim_ar           <- as.array(bim)                                # convert it to an array
    sub_bim_df       <- melt(bim_ar*as.numeric(fp_info$delta_b)*60)  # make it a dataframe
    # sub_bim_df       <- sub_bim_df[ sub_bim_df$value != 0 ,]         # drop everywhere with no influence
    sub_bim_df$bkgd  <- fp_info$bkgd_times[  sub_bim_df$Var2 ]       # pull the background times
    sub_bim_df$recep <- fp_info$recep_times[ rev(sub_bim_df$Var1) ]  # pull the receptor times
    sub_bim_df       <- sub_bim_df[ sub_bim_df$value != 0 ,]         # drop everywhere with no influence
    sub_bim_df$em    <- em                                           # add the em designator
    sub_bim_df       <- sub_bim_df[ -c(1,2,3) ]                      # drop the index columns
    for( r in unique( sub_bim_df$recep) ){                           # Loop through receptor times to make sure background PDFs add up to one.
      total_bk <- sum( sub_bim_df$value[ sub_bim_df$recep == r ] )
      sub_bim_df$value[ sub_bim_df$recep==r ]  <- sub_bim_df$value[ sub_bim_df$recep==r ]/total_bk
    }
    bim_df <- rbind(bim_df,sub_bim_df)                               # add the BIM for this em to the total data frame
  }
  bim_df$date <- fp_info$date
  return(bim_df)
}

#load the observations and their associated footprints:
create_y_df <- function(footprint_file,obs_file,ems,sectors,ras_stack){
  require(reshape2) 
  fp_info <- get_footprint_info(footprint_file)
  obs_df <- load_observations(obs_file,ems)
  y_df   <- data.frame()                           # data frame of valid observations

  for(em in ems){                                                       # loop through the em designators
    fp <- stack(foot_file,varname=paste(em,"foot"))                     # load the footprint
    for(r_idx in seq(1,length(fp_info$recep_times))){                   # loop through the receptor times
      r <- fp_info$recep_times[r_idx]                                   # receptor time
      r_posix <- as.POSIXct(r,origin="1970-01-01 00:00:00",tz="GMT")    # receptor time as posix object
      sdf <- obs_df[ (obs_df$recep > r) &                               # subset the observations within this window
                     (obs_df$recep < r + fp_info$delta_r) & 
                     (obs_df$em == em ) , ]
      ob <- mean( sdf$value )                                           # mean methane observation for that time
        for( s in seq(1,length(sectors))){                              # loop through the sectors
            sub_fp     <- fp[[r_idx]]                                   # select the footprint from the stack
            prior_flux <- cellStats( ras_stack[[s]]*sub_fp, 'sum' )     # multiply the footprint by the emissions map and add up all the boxes.
            y_df <- rbind(y_df, data.frame(em=em, 
                                           recep=r_posix, 
                                           ob=ob, 
                                           fp=prior_flux, 
                                           sector=sectors[s] ))
        }
    }
  }
  y_df$date <- fp_info$date 
  return(y_df)
}





# Merge the BIM and y_df into matricies for the inversion: --------------------------------------------------------------------------------------
merge_and_invert <- function(y_df,bim_df,sigma_sector_priors,sigma_bkgd_prior,t_back,sigma_obs_prior,bkgd_prior=1.84,use_transport_error=FALSE){
  require(tidyr)
  require(Matrix)
  sectors      <- unique(y_df$sector)
  bk_times     <- unique(bim_df$bkgd) 
  bim_df$index <- seq(1, dim(bim_df)[1] )
  
  n_sec     <- length(sectors)  # number of sectors
  n_bktimes <- length(bk_times) # number of background times
  x_prior   <- c( rep(1,n_sec) , rep(bkgd_prior, n_bktimes ) )  # the prior state vector
  
  A_df  <- spread(y_df,sector,fp)                 # A is the jacobian of footprints
  B_df  <- dcast( bim_df, recep + em ~ bkgd,      # B is the jacobian of background influence
                  fun.aggregate = sum) 
  AB_df  <- merge(A_df,B_df,by=c("em","recep"))    # AB is the merge of A and B.
  AB_df  <- AB_df[ !is.na( AB_df$ob ), ]           # drop times/sites where there is a footprint but no em27 data
  out_df <- AB_df[ , 1:(6+n_sec) ]                 # I'll add to this later for the output
  
  K <- data.matrix( AB_df[ -c(1,2,3,4,5,6) ] )  # K is the full jacobian used in the inversion:  K  =  [ A  B ]
  y <- AB_df$ob                             # y is the observation vector
  
  # S_p is the Prior Error Covariance Matrix:
  S_p_A <- expand.grid(sectors,sectors)
  for( sector in sectors){
    S_p_A$value[ (S_p_A$Var1 == sector) & (S_p_A$Var2 == sector )] <- sigma_sector_priors[[sector]]^2
  }
  S_p_upper_left  <- acast( S_p_A , Var1 ~ Var2) # 
  
  S_p_B           <- expand.grid(bk_times,bk_times)
  S_p_B$value     <- (sigma_bkgd_prior^2)*( exp(-1*abs(as.numeric(S_p_B$Var1) - as.numeric(S_p_B$Var2))/t_back) )
  S_p_lower_right <- acast( S_p_B , Var1 ~ Var2 )
  S_p             <- bdiag(S_p_upper_left,S_p_lower_right) %>% data.matrix()
  
  # S_err is the Model-Observation Error Covariance Matrix
  A_df_un <- unite(AB_df,"ob_name",c(1,2))
  S_err   <- expand.grid(A_df_un$ob_name, A_df_un$ob_name)
  S_err$value[ S_err$Var1 == S_err$Var2] <- sigma_obs_prior^2
  if(use_transport_error){
    S_err$value[ S_err$Var1 == S_err$Var2] <- S_err$value[ S_err$Var1 == S_err$Var2] + (A_df_un$te_sd*1e-3)^2
  }
  S_err <- acast( S_err , Var1 ~ Var2 ) 
  S_err <- replace_na( S_err, 0 )
  S_p   <- replace_na( S_p,   0 )
  
  # Inversion
  G <- S_p %*% t(K) %*% solve( K %*% S_p %*% t(K) + S_err)     # G is the Gain matrix
  x_hat <- x_prior + G %*% (y - K %*% x_prior)                 # x_hat is the posterior 
  S_pos <- solve( t(K) %*% solve( S_err ) %*% K + solve(S_p) ) # S_pos is the posterior error covarience matrix
  #Fisher <- t(K) %*% solve( S_err ) %*% K                     # Fisher information
  
  # A <- G %*% K                                                 # Averaging kernel matrix
  
  pos_bk_sigma <- sqrt( diag(S_pos) )  # in ppm
  pos_bk_sigma <- pos_bk_sigma[ (n_sec+1) : length(pos_bk_sigma) ]
  
  out_df$bk_err <-sqrt( K %*% (  c( rep(0,n_sec) , pos_bk_sigma )^2 ) )
  
  bk_df <- data.frame(t=bk_times,b_hat=x_hat[(n_sec+1):length(x_hat)], date=date, prior_bk_sigma = sigma_bkgd_prior, pos_bk_sigma = pos_bk_sigma )
  
  out_df$y_hat <- K %*% x_hat                # posterior ch4 concentration values
  x_bk <- x_hat
  x_bk[1:n_sec] <- 0
  out_df$y_bk <- K %*% x_bk                  # posterior background
  
  
  #[Magda] forward model result using prior emission scaling factor (x_prior)----------------------------------------------------------------------------
  out_df$y_prior <- K %*% x_prior            # prior expected ch4 concentration values
  
  for( sector in sectors){
    x <- rep( 0,length(x_hat) )
    x[ which(sectors==sector) ] <- x_hat[ which(sectors==sector) ]
    out_df[ , paste0('pos_',sector)] <- K %*% x
  }
  
  output <- list( "df" = out_df, "bk_df"=bk_df, "x_hat" = x_hat, "S_pos" = S_pos)
}



# Implement Bootstrap: ---------------------------------------------------------------------------------------------------------------------------
do_bootstrap <- function(df, sector,n_times = 10000, bs_x_err=1e-2, bs_y_err=1e-2){
  df$x      <- df[ , sector ]
  df$y_else <- df$y_hat - df[ , paste0('pos_',sector) ] 
  df$y      <- df$ob - df$y_else
  
  n <- dim(df)[1]  # number of points to sample
  bs_sf <- NULL       # this will be a list of all the scaling factors 

  for(i in 1:n_times){
    idx    <- sample(1:n, n, replace=T)        # list of random selections
    df_sub <- df[idx,]                         # dataframe containing those points
    err_x  <- rnorm(n,0,bs_x_err)              # model + prior error [ppm]
    err_y  <- rnorm(n,0,bs_y_err)              # measurement error   [ppm]
    x      <- df_sub$x + err_x                 # add the error in the x
    y      <- df_sub$y + err_y                 # add the error in the y
    fit    <- lm(y ~ x)                        # fit a line to the scatterplot
    bs_sf  <- c(bs_sf, fit$coefficients[[2]] ) # scaling factor is the slope of that line
  }
  return( bs_sf )
}
